suppressPackageStartupMessages({
library(Seurat)
library(dplyr)
library(data.table)
library(ggplot2)
library(patchwork)
library(harmony)
library(entropy)
library(presto)
library(singlecellmethods)
library(lme4)
library(purrr)
library(pheatmap)
library(rcna)
library(glue)
library(ggthemes)
})
source("myfun.r")
# path list
allDat.dir <- "/data/brennerlab/Shani/projects/Treg/analysis/"
# AMP data options
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_orig.rds") # AMP original Tregs annotations
amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds") # Tregs after additional QC filtering
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_filtered_refined.rds") # Refined Treg annotations (permissive)
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_alternative.rds") # My alternative Treg annotation
amp2.extra.metadata.path <- "/data/brennerlab/Shani/projects/AMP_Phase_2/treatment assigned metadata_clin_donor_singlecell.csv"
amprep.path <- paste0(allDat.dir, "AMPrep_syn_bl/AMPrep_ST_BL_filtered_Tregs.rds")
sf.path <- paste0(allDat.dir, "SF_sf_bl/SF_BL_filtered.rds")
# ctrl.bl.path <- paste0(allDat.dir, "Ctrl_bl/Ctrl_BL_filtered_Tregs.rds")
ctrl.bl.path <- paste0(allDat.dir, "Ctrl_bl/Ctrl_BL_Luo_filtered_Tregs.rds")
#TODO: control blood
save.path <- "/data/brennerlab/Shani/projects/Treg/analysis/integrated/"
amp2 <- readRDS(amp2.path)
amprep <- readRDS(amprep.path)
sf <- readRDS(sf.path)
ctrl.bl <- readRDS(ctrl.bl.path)
amp2@meta.data%>% colnames()
amp2@meta.data %>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | sample | cluster | cell_type | donor | fibro | Tfilter | nUMI | ⋯ | krenn | subject_id | CTAP | UMAP_1.amp | UMAP_2.amp | humap_fgraph_res.0.5 | seurat_clusters | humap_fgraph_res.0.8 | humap_fgraph_res.1 | sub.cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <fct> | <dbl> | <int> | <chr> | <chr> | <chr> | <chr> | <lgl> | <lgl> | <dbl> | ⋯ | <dbl> | <chr> | <chr> | <dbl> | <dbl> | <fct> | <fct> | <fct> | <fct> | <chr> | |
| BRI-399_AAAGGTAGTGCAGGAT | BRI-399 | 4101 | 1492 | BRI-399 | T-8: CD4+ CD25-high Treg | T cell | BRI-399 | TRUE | FALSE | 4101 | ⋯ | NA | 301-0267 | NA | -1.563939 | 2.577495 | 3 | 3 | 4 | 2 | 2 |
| BRI-399_AACGGGATCTTGTTAC | BRI-399 | 3568 | 1454 | BRI-399 | T-8: CD4+ CD25-high Treg | T cell | BRI-399 | TRUE | FALSE | 3568 | ⋯ | NA | 301-0267 | NA | -1.630056 | 1.672564 | 4 | 4 | 9 | 4 | 4 |
amp2.extra.metadata <- read.csv(amp2.extra.metadata.path)
# amp2.extra.metadata <- amp2.extra.metadata %>% mutate(subject_id = gsub("-", "_", subject_id))
amp2.extra.metadata%>% head
| subject_id | pipeline_date | site | treatment | biopsied | sex | age | CDAI | disease_duration | tissue_type | krenn_inflammation | single_cell_tech | cell_count_to_10x | mRNA_run | protein_run | atac_run | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> | <chr> | <dbl> | <chr> | <dbl> | <chr> | <chr> | <chr> | <chr> | <chr> | |
| 1 | 301-0267 | 8/6/2019 | University of Pittsburgh | OA | Female | 70 | NA | Arthroplasty | NA | CITEseq + flow/bulk + re-freeze | 15,000 | BRI-399 | BRI-400 | BRI-448 | ||
| 2 | 300-0302 | 8/6/2019 | Cedars | Naïve | Knee | Female | 77 | 15 | 2.1 | Biopsy | 2.33 | CITEseq | 15,000 | BRI-401 | BRI-402 | |
| 3 | 300-0150 | 8/6/2019 | University of Rochester | methotrexate failure | Wrist | Female | 28 | 22 | 1.5 | Synovectomy | 2.67 | CITEseq + flow/bulk + re-freeze | 15,000 | BRI-403 | BRI-404 | BRI-449 |
| 4 | 300-0310 | 8/6/2019 | Cedars | TNF failure | Wrist | Female | 62 | 24.5 | 16.0 | Biopsy | 0.33 | CITEseq | 2,200 | BRI-405 | BRI-406 | |
| 5 | 300-2663 | 8/6/2019 | Columbia University | methotrexate failure | Wrist | Female | 44 | 40 | 6.9 | Biopsy | 3.00 | CITEseq | 3,800 | BRI-407 | BRI-408 | |
| 6 | 300-0460 | 8/6/2019 | Northwestern | methotrexate failure | Wrist | Female | 68 | 11 | 3.8 | Biopsy | 3.00 | CITEseq | 5,000 | BRI-409 | BRI-410 |
amp2.meta <- amp2@meta.data %>% select(donor) %>% left_join(., amp2.extra.metadata%>% select(c(mRNA_run, biopsied)),by = join_by(donor == mRNA_run))
amp2$biopsied <- amp2.meta$biopsied
amp2@meta.data[2434:2436,]
| orig.ident | nCount_RNA | nFeature_RNA | sample | cluster | cell_type | donor | fibro | Tfilter | nUMI | ⋯ | subject_id | CTAP | UMAP_1.amp | UMAP_2.amp | humap_fgraph_res.0.5 | seurat_clusters | humap_fgraph_res.0.8 | humap_fgraph_res.1 | sub.cluster | biopsied | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <fct> | <dbl> | <int> | <chr> | <chr> | <chr> | <chr> | <lgl> | <lgl> | <dbl> | ⋯ | <chr> | <chr> | <dbl> | <dbl> | <fct> | <fct> | <fct> | <fct> | <chr> | <chr> | |
| BRI-515_TTGGTTTCACGGTGTC | BRI-515 | 1849 | 901 | BRI-515 | T-9: CD4+ CD25-low Treg | T cell | BRI-515 | FALSE | TRUE | 1849 | ⋯ | 300-0504 | T + F | -1.737160 | 1.268877 | 0 | 0 | 0 | 0 | 0 | Ankle |
| BRI-515_TTGTGTTCAAATTGCC | BRI-515 | 5884 | 1923 | BRI-515 | T-8: CD4+ CD25-high Treg | T cell | BRI-515 | FALSE | TRUE | 5884 | ⋯ | 300-0504 | T + F | -2.215138 | 1.919830 | 2 | 2 | 3 | 1 | 1 | Ankle |
| BRI-515_TTGTGTTTCGCGCCAA | BRI-515 | 4525 | 1311 | BRI-515 | T-8: CD4+ CD25-high Treg | T cell | BRI-515 | FALSE | TRUE | 4525 | ⋯ | 300-0504 | T + F | -1.621291 | 2.516766 | 0 | 0 | 0 | 1 | 1 | Ankle |
amp2$orig.ident <- "AMP2"
amp2$sample.library <- amp2$sample
amp2$donorID <- amp2$subject_id
amp2$donorID <- gsub("-", "_", amp2$donorID)
amp2$percent.mito <- amp2$percent_mito
amp2$tissue <- "Syn"
amp2$tissue <- ifelse(amp2$treatment == "OA", "Syn_OA", amp2$tissue)
amp2$tissue_collection <- amp2$tissue.type
amp2@meta.data <- amp2@meta.data %>% select(c(-cluster, -cell_type, -donor, -fibro, -Tfilter, -nUMI, -nGene, -Tfilter2, -tissue.type, -subject_id,
-UMAP_1.amp, -UMAP_2.amp, -humap_fgraph_res.0.8, -seurat_clusters, -percent_mito)) #
colnames(amp2@meta.data)
amprep@meta.data%>% colnames()
amprep@meta.data %>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | nCount_ADT | nFeature_ADT | study_ID | AMP_ID | method | tissue_location | age | ⋯ | tissue | ct | seq_repeat | percent.mito | scDblFinder.class | scDblFinder.score | sub.cluster | humap_fgraph_res.0.5 | seurat_clusters | humap_fgraph_res.0.8 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <dbl> | <int> | <chr> | <chr> | <chr> | <chr> | <int> | ⋯ | <chr> | <chr> | <chr> | <dbl> | <fct> | <dbl> | <chr> | <fct> | <fct> | <fct> | |
| 300_0150_PBL_BT_5_AACACGTAGCGTTCCG-1 | 300_0150_PBL_BT_5 | 34584 | 5166 | 2599 | 5 | RA01 | 300_0150 | Synovectomy | Wrist | 28 | ⋯ | PBL | BT | 1 | 3.336803 | singlet | 0.188737959 | 14 | 3 | 4 | 4 |
| 300_0150_PBL_BT_5_AACTCAGCATCGACGC-1 | 300_0150_PBL_BT_5 | 4841 | 1703 | 1108 | 6 | RA01 | 300_0150 | Synovectomy | Wrist | 28 | ⋯ | PBL | BT | 1 | 2.210287 | singlet | 0.006859329 | 7 | 2 | 1 | 1 |
amprep$orig.ident <- "AMPrep"
amprep$sample.library <- amprep$sampleName
amprep$donorID <- amprep$AMP_ID
amprep$treatment <- amprep$treatment_group
amprep$tissue_collection <- amprep$method
amprep$biopsied <- amprep$tissue_location
amprep$disease.duration <- amprep$RA_duration
amprep$krenn <- amprep$krenn_inflammation
amprep$CDAI <- amprep$cdai_V0
amprep$tissue <- ifelse(amprep$tissue == "PBL", "PBMC.RA", amprep$tissue)
amprep@meta.data <- amprep@meta.data %>% select(c(-study_ID, -scDblFinder.class, -scDblFinder.score, -RA_duration, -krenn_inflammation, -cdai_V0, -treatment_group,
-site, -library, -sampleName, -ct, -humap_fgraph_res.0.8, -seurat_clusters, -AMP_ID))
colnames(amprep@meta.data)
sf@meta.data%>% colnames()
sf@meta.data %>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | nCount_ADT | nFeature_ADT | nCount_HTO | nFeature_HTO | sampleID | percent.mito | HTO_maxID | ⋯ | donorID | tissue | sorted.population | soting.marker | humap_fgraph_res.1 | seurat_clusters | scDblFinder.class | scDblFinder.score | sub.cluster | humap_fgraph_res.0.8 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <dbl> | <int> | <dbl> | <int> | <chr> | <dbl> | <chr> | ⋯ | <chr> | <chr> | <chr> | <chr> | <fct> | <fct> | <fct> | <dbl> | <chr> | <fct> | |
| BRI-1426__AAACCTGAGAGTAAGG-1 | SF | 4700 | 1697 | 610 | 99 | 169 | 5 | BRI-1426 | 5.319149 | Hashtag5 | ⋯ | DiSF013 (aka P012) | SFMC | R2 (Treg) | CD127- | 0 | 0 | singlet | 0.035700221 | 3 | 0 |
| BRI-1426__AAACCTGAGGCAGTCA-1 | SF | 6223 | 2189 | 703 | 101 | 323 | 5 | BRI-1426 | 1.462317 | Hashtag5 | ⋯ | DiSF013 (aka P012) | SFMC | R2 (Treg) | CD127- | 9 | 9 | singlet | 0.002335656 | 1 | 9 |
sf$orig.ident <- "SF.BL"
sf$sample.library <- sf$sampleID
sf$tissue <- ifelse(sf$tissue == "PBMC", "PBMC.RA", sf$tissue)
sf@meta.data <- sf@meta.data %>% select(c(-Diagnosis, -sorted.population, -soting.marker, -humap_fgraph_res.0.8, -seurat_clusters, -sampleID))
colnames(sf@meta.data)
ctrl.bl@meta.data%>% colnames()
ctrl.bl@meta.data%>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | donorID | sex | age | sequencing.technology | library | celltype | disease | sample_name | tissue | percent.mito | humap_fgraph_res.1 | seurat_clusters | scDblFinder.class | scDblFinder.score | humap_fgraph_res.0.8 | humap_fgraph_res.0.5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <chr> | <chr> | <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <dbl> | <fct> | <fct> | <fct> | <dbl> | <fct> | <fct> | |
| AAACCTGAGACGCTTT-1_2_1_1 | HD_Luo_HD4 | 3669 | 1391 | HD4 | M | 45 | scRNA_VDJ5 | SRR14664412_GSM5342394 | Treg | HD | SRR14664412_GSM5342394_Treg_HD_PB4 | PB | 2.83456 | 2 | 0 | singlet | 0.006454074 | 2 | 0 |
| AAACCTGAGAGTAAGG-1_2_1_1 | HD_Luo_HD4 | 2513 | 1033 | HD4 | M | 45 | scRNA_VDJ5 | SRR14664412_GSM5342394 | Treg | HD | SRR14664412_GSM5342394_Treg_HD_PB4 | PB | 5.17310 | 2 | 0 | singlet | 0.026277108 | 2 | 0 |
cellnames <- colnames(ctrl.bl)
mt <- ctrl.bl@meta.data %>% mutate(new.names = paste0(tissue, "-", donorID, "_", gsub("_.*", "", rownames(.)))) %>% with(new.names)
ctrl.bl <- RenameCells(ctrl.bl, new.names = mt)
ctrl.bl@meta.data%>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | donorID | sex | age | sequencing.technology | library | celltype | disease | sample_name | tissue | percent.mito | humap_fgraph_res.1 | seurat_clusters | scDblFinder.class | scDblFinder.score | humap_fgraph_res.0.8 | humap_fgraph_res.0.5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <chr> | <chr> | <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <dbl> | <fct> | <fct> | <fct> | <dbl> | <fct> | <fct> | |
| PB-HD4_AAACCTGAGACGCTTT-1 | HD_Luo_HD4 | 3669 | 1391 | HD4 | M | 45 | scRNA_VDJ5 | SRR14664412_GSM5342394 | Treg | HD | SRR14664412_GSM5342394_Treg_HD_PB4 | PB | 2.83456 | 2 | 0 | singlet | 0.006454074 | 2 | 0 |
| PB-HD4_AAACCTGAGAGTAAGG-1 | HD_Luo_HD4 | 2513 | 1033 | HD4 | M | 45 | scRNA_VDJ5 | SRR14664412_GSM5342394 | Treg | HD | SRR14664412_GSM5342394_Treg_HD_PB4 | PB | 5.17310 | 2 | 0 | singlet | 0.026277108 | 2 | 0 |
ctrl.bl@meta.data <- ctrl.bl@meta.data %>% mutate(sex = gsub("F", "Female", .$sex)) %>% mutate(sex = gsub("M", "Male", .$sex))
ctrl.bl$tissue <- "PBMC.Ctrl"
ctrl.bl$donorID <- ctrl.bl$donorID
ctrl.bl$sample.library <- ctrl.bl$sample_name
ctrl.bl$orig.ident <- "HD_Luo"
ctrl.bl@meta.data <- ctrl.bl@meta.data %>% select(c(-sequencing.technology, -library, -celltype, -disease, -sample_name, -humap_fgraph_res.1, -seurat_clusters,
-humap_fgraph_res.0.8, -humap_fgraph_res.0.5))
colnames(ctrl.bl@meta.data)
merged <- merge(amp2, c(amprep, sf, ctrl.bl))
merged
An object of class Seurat 38406 features across 39332 samples within 3 assays Active assay: RNA (38224 features, 0 variable features) 2 layers present: counts, data 2 other assays present: ADT, HTO
table(merged$orig.ident)
table(merged$tissue)
AMP2 AMPrep HD_Luo SF.BL 5368 3298 25847 4819
PBMC.Ctrl PBMC.RA SFMC Syn Syn_OA
25847 4053 1854 7229 349
Idents(merged) <- "tissue"
merged <- subset(merged, ident = "Syn_OA", invert = TRUE)
merged
An object of class Seurat 38406 features across 38983 samples within 3 assays Active assay: RNA (38224 features, 0 variable features) 2 layers present: counts, data 2 other assays present: ADT, HTO
table(merged$orig.ident)
table(merged$tissue)
AMP2 AMPrep HD_Luo SF.BL 5019 3298 25847 4819
PBMC.Ctrl PBMC.RA SFMC Syn
25847 4053 1854 7229
fig.size(5, 8)
#mito %
merged[["percent.mito"]] <- PercentageFeatureSet(merged, pattern = "^MT-")
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
fig.size(5, 20)
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
#genes num
fig.size(5, 8)
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("genes number")
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("genes number")
fig.size(5, 20)
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("genes number")
#counts num
fig.size(5, 8)
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("counts number")
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("counts number")
fig.size(5, 20)
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("counts number")
merged@meta.data%>% head(2)
| orig.ident | nCount_RNA | nFeature_RNA | sample | cluster_number | cluster_name | nCount_ADT | nFeature_ADT | age | sex | ⋯ | nCount_HTO | nFeature_HTO | HTO_maxID | HTO_secondID | HTO_margin | HTO_classification | HTO_classification.global | hash.ID | scDblFinder.class | scDblFinder.score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <chr> | <chr> | <chr> | <dbl> | <int> | <int> | <chr> | ⋯ | <dbl> | <int> | <chr> | <chr> | <dbl> | <chr> | <chr> | <chr> | <chr> | <dbl> | |
| BRI-401_AAACGCTCAATACCCA | AMP2 | 4980 | 1693 | BRI-401 | T-8 | T-8: CD4+ CD25-high Treg | 1290 | 46 | 77 | Female | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
| BRI-401_AAAGGATCACACACGC | AMP2 | 5435 | 1869 | BRI-401 | T-8 | T-8: CD4+ CD25-high Treg | 352 | 43 | 77 | Female | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA |
merged <- merged %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData() %>%
RunBalancedPCA(weight.by = "tissue", npcs = 50)
# RunPCA(verbose = T)
Centering and scaling data matrix
# I don't re-normalize/scale protein data as not all datasets has this information!
# merged <- merged %>%
# NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") #%>%
# # ScaleData(assay = "ADT")
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T)
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T)
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F) + theme(legend.position = "none")
fig.size(4,5)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T, dims = c(1,3))
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T, dims = c(1,3))
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F, dims = c(1,3)) + theme(legend.position = "none")
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T, dims = c(1,4))
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T, dims = c(1,4))
fig.size(4,20)
DimPlot(merged, reduction = "pca", group.by = "tissue", shuffle = T, label = F, split.by = "orig.ident")
barplot(table(merged$donorID))
# compare the duplicated donors (from datasets AMP2 and AMPrep, to look for the variable of variation
dup.donor <- merged@meta.data%>% select(donorID, orig.ident)%>% unique() %>% arrange(donorID)%>% count(donorID)%>% filter(n>1)%>% .$donorID
fig.size(5,5)
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "donorID",shuffle = T, label = F) + theme(legend.position = "none")
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "orig.ident",shuffle = T, label = F)
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "donorID",shuffle = T, label = F, dims = 2:3) + theme(legend.position = "none")
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "orig.ident",shuffle = T, label = F, dims = 2:3)
Looks like: PC1 separates experiment PC2 separates tissue PC3 separates experiment, too
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"),
reduction.use = "pca",
plot_convergence = TRUE,
early_stop = F)
fig.size(5,5)
ElbowPlot(merged, ndims = 50, reduction = "harmony")
ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10
fig.size(5,5)
DimPlot(merged, reduction = "harmony", group.by = "orig.ident",shuffle = T)
DimPlot(merged, reduction = "harmony", group.by = "tissue",shuffle = T)
fig.size(5,15)
DimPlot(merged, reduction = "harmony", group.by = "donorID",shuffle = T, label = F)
# run umap
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 38983 Number of edges: 474203 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.6913 Number of communities: 10 Elapsed time: 3 seconds
fig.size(5, 6)
red.use <- "humap"
DimPlot(merged, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use,group.by = "orig.ident", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 6)
DimPlot(merged, reduction = red.use,group.by = "sex", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(8, 8)
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 10")
fig.size(12,12)
FeaturePlot(merged, reduction = red.use,features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "MKI67", "CD4", "CD8A", "CCR7", "TIGIT"))
fig.size(5, 20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "seurat_clusters", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(8,6)
VlnPlot(merged, features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), group.by = "tissue", ncol = 1, pt.size = 0.00)
table(merged$seurat_clusters)
table(merged$seurat_clusters, merged$orig.ident)
table(merged$seurat_clusters, merged$tissue)
0 1 2 3 4 5 6 7 8 9 8982 8703 6670 5993 3482 2517 1175 795 400 266
AMP2 AMPrep HD_Luo SF.BL
0 337 415 7346 884
1 1576 840 5423 864
2 766 492 4376 1036
3 664 492 4088 749
4 336 309 2278 559
5 932 263 1095 227
6 322 404 252 197
7 26 50 611 108
8 60 27 213 100
9 0 6 165 95
PBMC.Ctrl PBMC.RA SFMC Syn
0 7346 1021 117 498
1 5423 893 212 2175
2 4376 743 461 1090
3 4088 649 279 977
4 2278 246 394 564
5 1095 212 81 1129
6 252 81 172 670
7 611 95 38 51
8 213 46 70 71
9 165 67 30 4
fig.size(4,10)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0) +
scale_fill_tableau("Tableau 20")
median(table(merged$orig.ident)[c(1,2,4)])
max(table(merged$orig.ident)[c(1,2,4)])
median(table(merged$tissue)[2:4])
max(table(merged$tissue)[2:4])
n <- max(table(merged$tissue)[2:4])
n
set.seed(1)
ctrl.bl.subsampled <- ctrl.bl[, sample(colnames(ctrl.bl), size = n, replace=F)]
merged <- merge(amp2, c(amprep, sf, ctrl.bl.subsampled))
merged
table(merged$tissue)
table(merged$orig.ident)
Idents(merged) <- "tissue"
merged <- subset(merged, ident = "Syn_OA", invert = TRUE)
merged
An object of class Seurat 38406 features across 20714 samples within 3 assays Active assay: RNA (38224 features, 0 variable features) 2 layers present: counts, data 2 other assays present: ADT, HTO
PBMC.Ctrl PBMC.RA SFMC Syn Syn_OA
7229 4053 1854 7229 349
AMP2 AMPrep HD_Luo SF.BL 5368 3298 7229 4819
An object of class Seurat 38406 features across 20365 samples within 3 assays Active assay: RNA (38224 features, 0 variable features) 2 layers present: counts, data 2 other assays present: ADT, HTO
merged <- merged %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData() %>%
# RunBalancedPCA(weight.by = "tissue")
RunPCA(verbose = T)
Centering and scaling data matrix PC_ 1 Positive: RBFOX2, APOO, FP236383.3, LTB, MT-ATP8, OOEP, STEAP1B, SLC35F1, RPS10-NUDT3, PTPRCAP BAIAP2L1, FP671120.4, MALAT1, AC004448.2, PLAC8, RPLP0, CEACAM4, FXYD2, MT-ND3, AL136456.1 FCER1G, TEX14, AIF1, SNED1, ANK2, TRAV8-2, CRIP2, FAAH2, AQP3, TRBV5-1 Negative: TYMS, MKI67, RRM2, PCLAF, UBE2C, BIRC5, TOP2A, CDK1, NUSAP1, TK1 CCNA2, KIFC1, CDT1, PKMYT1, TPX2, ASF1B, CLSPN, CENPF, AURKB, CDCA5 ZWINT, GTSE1, UHRF1, MYBL2, ASPM, STMN1, SPC25, DLGAP5, HIST1H1B, CENPW PC_ 2 Positive: JUND, IGKC, AES, MT-ATP6, MT-ND4, RPS10, SEPT7, SEPT6, KIAA1551, MALAT1 FOSB, XIST, MT-ND1, MT-ND2, RARRES3, MT-CO3, IGLC2, NEAT1, FOS, TRAC POLR2J3.1, MINOS1, C6orf48, SEPT9, CREM, FAM96B, IGHG3, ZNF331, FAM129A, SOCS3 Negative: MT-ATP8, APOO, RBFOX2, PTPRCAP, RPLP0, NME2, FP236383.3, MIF, PFN1, PPIA OOEP, STEAP1B, SLC35F1, CNN2, RPS10-NUDT3, ACTB, AC004448.2, BAIAP2L1, STMN1, TUBB RRM2, FP671120.4, HMGN2, UBE2C, MKI67, TYMS, PKMYT1, CORO1A, CCNA2, PCLAF PC_ 3 Positive: HLA-A, IL32, CD74, CRIP1, PFN1, S100A4, GAPDH, ACTB, LGALS3, FOXP3 HLA-DRB5, ARPC1B, HLA-B, MYL6, LINC01943, ANXA2, TMSB10, S100A10, MT1E, CTSC MT2A, MIF, LINC01578, TAGLN2, UCP2, S100A6, LY6E, TNFRSF18, HLA-DRB1, MT1X Negative: MT-ND3, MT-ND1, MT-ND4, RPS10, IGKC, MT-CYB, MT-CO3, AES, MALAT1, MT-CO2 MT-ND2, C6orf48, KIAA1551, SEPT6, UBE2C, RARRES3, SEPT7, IGLC2, TOP2A, GTSE1 CCNA2, BIRC5, MKI67, RRM2, DLGAP5, AURKB, POLR2J3.1, IGHG3, ASPM, KIFC1 PC_ 4 Positive: JUNB, NR4A2, DUSP1, ZFP36, FOS, CSRNP1, DUSP2, JUN, ZNF331, PPP1R15A TNFAIP3, SLC2A3, NR4A3, DNAJB1, FOSB, CDKN1A, CD69, RGCC, NR4A1, CXCR4 GEM, YPEL5, NFKBIA, KLF6, BTG2, PTGER4, AREG, TUBB4B, SERTAD1, CREM Negative: MT-CO3, GNAI2, AES, SEPT7, SEPT6, GNAS, FAM129A, TMSB4X, KIAA1551, TRAC MBD2, POLR2J3.1, MT-CO2, MINOS1, CD81, FAM96B, SEPT1, MALAT1, HNRNPD, MZT2B UBALD2, SEPT9, C8orf59, RPS10, C19orf70, MT-ND4, C15orf53, FAM89B, SEPT2, MT-CYB PC_ 5 Positive: GINS2, MCM2, E2F1, CDC45, CDC6, MCM7, UHRF1, DTL, MCM4, CDT1 MCM5, TYMS, CLSPN, PAQR4, TK1, PCNA, FEN1, MCM6, GZMH, NKG7 PCLAF, MCM10, HELLS, FAM111B, CENPU, MCM3, CHEK1, UNG, DHFR, MATK Negative: PLK1, CDC20, DLGAP5, ASPM, KIF14, CENPA, HMMR, CCNB1, UBE2C, TROAP CDCA3, CENPE, CCNB2, GTSE1, CENPF, CDCA8, KIF20A, BIRC5, AURKB, TOP2A KIF23, TPX2, KIF2C, CDCA2, PRR11, MTRNR2L12, UBALD2, CKAP2L, AURKA, MXD3
fig.size(5,5)
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"),
reduction.use = "pca",
plot_convergence = TRUE,
early_stop = T)
# ElbowPlot(merged, ndims = 50, reduction = "harmony")
# ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony converged after 3 iterations
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
resolution = 0.8, verbose = TRUE)
fig.size(5, 6)
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use <- "humap"
DimPlot(merged, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use,group.by = "orig.ident", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 6)
DimPlot(merged, reduction = red.use,group.by = "sex", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 20365 Number of edges: 251239 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7047 Number of communities: 9 Elapsed time: 1 seconds
fig.size(8, 8)
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 10")
fig.size(12,12)
FeaturePlot(merged, reduction = red.use,features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "MKI67", "CD4", "CD8A", "CCR7", "TIGIT"))
fig.size(5, 20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "seurat_clusters", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(4,10)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0) +
scale_fill_tableau("Tableau 20")
Idents(merged) <- "seurat_clusters"
Tregs.markers <- wilcoxauc(merged, "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
m.show <- Tregs.markers %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 30, order_by = logFC)%>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
# m.show %>% write.csv(paste0(save.path, "integrated.cluster.markers.csv"))
m.show[1:25,]
| row | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | GAS5 | CD74 | RPL17 | FOS | CD74 | STMN1 | MT-ATP6 | TRBV7-6 | ISG15 |
| 2 | EEF1G | IL32 | TXNIP | JUNB | SRGN | TUBA1B | MT-ND4 | TRBV7-4 | IFI6 |
| 3 | MT-ATP8 | S100A4 | IFITM1 | JUND | S100A4 | TUBB | MT-ND2 | TRBV7-9 | MX1 |
| 4 | APOO | HLA-DRB1 | EEF1G | JUN | MT2A | HMGB2 | MALAT1 | TRBV7-2 | MT2A |
| 5 | SNHG29 | HLA-DRB5 | RBFOX2 | FOSB | CD2 | TYMS | IGKC | IFITM1 | LY6E |
| 6 | EEF1B2 | HLA-DPA1 | TLE5 | NR4A2 | TNFRSF18 | CD74 | MT-ND1 | EEF1G | OAS1 |
| 7 | RPL17 | HLA-DPB1 | PTPRCAP | ZNF331 | GAPDH | GZMA | MT-ND3 | RPL17 | STAT1 |
| 8 | TCF7 | LGALS1 | MT-ND4L | DUSP1 | S100A11 | CCL5 | MT-CO3 | TRBV7-5 | ISG20 |
| 9 | RPS12 | CRIP1 | RPL36A | MT-ATP6 | LGALS1 | GAPDH | NEAT1 | TLE5 | XAF1 |
| 10 | RPS13 | ANXA2 | RPL8 | RPS10 | MYL6 | HMGB1 | MT-ND5 | TXNIP | OASL |
| 11 | RBFOX2 | FOXP3 | GSTK1 | DUSP2 | CRIP1 | MKI67 | MT-CYB | RPS4Y1 | EIF2AK2 |
| 12 | FP236383.3 | CYTOR | LDHB | IGKC | COTL1 | H2AFZ | XIST | GAS5 | HERC5 |
| 13 | RPL32 | CLIC1 | EEF1A1 | YPEL5 | TNFRSF4 | MT2A | MT-CO1 | NME2 | IFIT3 |
| 14 | SNHG6 | LGALS3 | RPS8 | CREM | ARPC1B | HLA-DRA | MT-CO2 | EEF2 | IFIT1 |
| 15 | LEF1 | SH3BGRL3 | RPL34 | BTG2 | LGALS3 | COTL1 | SYNE2 | FXYD5 | EPSTI1 |
| 16 | TXNIP | GBP5 | RPL30 | MT-ND4 | LAG3 | HMGN2 | RNF213 | RESF1 | RNF213 |
| 17 | MT-ND4L | S100A10 | RPS27 | CD69 | CLIC1 | DUT | KIAA1551 | LINC01578 | TRIM22 |
| 18 | RPS5 | HLA-A | EEF2 | DNAJA1 | KLRB1 | NKG7 | TRAC | PLAAT4 | MX2 |
| 19 | CCR7 | S100A6 | RPS21 | PPP1R15A | PKM | DEK | N4BP2L2 | IL2RG | SAT1 |
| 20 | PRKCQ-AS1 | CTSC | RPS12 | SRGN | ENO1 | HLA-DRB1 | POLR2J3.1 | SEPTIN1 | IFITM1 |
| 21 | RPL30 | LSP1 | RPL32 | CXCR4 | UCP2 | HIST1H4C | NKTR | PFN1 | CD74 |
| 22 | RPS8 | HLA-DRA | RPS3 | RGS1 | TNFRSF1B | LGALS1 | PCSK7 | EEF1A1 | SP110 |
| 23 | RPL36A | ARPC1B | RPL39 | LMNA | CTSC | PCLAF | DDX17 | NA | SP100 |
| 24 | RPL5 | ISG20 | RPS15A | TNFAIP3 | IL2RG | H2AFV | SEPT6 | NA | IFI44L |
| 25 | GIMAP7 | MYL6 | RPL10 | SOCS3 | CTLA4 | TPI1 | IGHM | NA | IRF7 |
#Eexplore TRBV7 genes
trbv7 <- rownames(merged@assays$RNA@scale.data)[grepl("TRBV7", rownames(merged@assays$RNA@scale.data))]
trbv <- rownames(merged@assays$RNA@scale.data)[grepl("TRBV", rownames(merged@assays$RNA@scale.data))]
trav <- rownames(merged@assays$RNA@scale.data)[grepl("TRAV", rownames(merged@assays$RNA@scale.data))]
trbv7
trbv
trav
fig.size(10, 10)
FeaturePlot(merged, features = trbv7, ncol = 3)
fig.size(10, 10)
dat.plot <- FetchData(merged, vars = c(trbv), slot = "scale.data")
dat.meta <- FetchData(merged, vars = c("seurat_clusters", "tissue", "orig.ident"), slot = "data")
dat.meta <- dat.meta%>% arrange(seurat_clusters, tissue, orig.ident)
pheatmap(dat.plot[rownames(dat.meta),], scale = "none", cluster_rows = F, cluster_cols = T, show_rownames = F,
annotation_row = dat.meta)
Warning message:
“The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.”
fig.size(10, 10)
dat.plot <- FetchData(merged, vars = c(trbv7), cells = rownames(merged@meta.data)[merged$seurat_clusters == 7], slot = "scale.data")
dat.meta <- FetchData(merged, , cells = rownames(merged@meta.data)[merged$seurat_clusters == 7], vars = c("seurat_clusters", "tissue", "orig.ident"), slot = "data")
dat.meta <- dat.meta%>% arrange(seurat_clusters, tissue, orig.ident)
pheatmap(dat.plot[rownames(dat.meta),], scale = "none", cluster_rows = F, cluster_cols = T, show_rownames = F,
annotation_row = dat.meta)
table(merged$seurat_clusters, merged$orig.ident)
table(merged$seurat_clusters, merged$tissue)
# table(merged$seurat_clusters[merged$seurat_clusters == 8], merged$donorID[merged$seurat_clusters == 8], merged$tissue[merged$seurat_clusters == 8])
AMP2 AMPrep HD_Luo SF.BL
0 473 523 2967 1223
1 792 573 1279 1334
2 388 471 2107 993
3 2374 949 216 198
4 354 285 404 579
5 286 400 45 179
6 290 49 73 74
7 0 31 98 141
8 62 17 40 98
PBMC.Ctrl PBMC.RA SFMC Syn
0 2967 1537 65 617
1 1279 870 671 1158
2 2107 1062 174 616
3 216 127 160 3234
4 404 172 477 569
5 45 78 160 627
6 73 64 29 320
7 98 106 48 18
8 40 37 70 70
Idents(merged) <- "seurat_clusters"
merged <- FindSubCluster(merged, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 4, resolution = 0.3, graph.name = "humap_fgraph")
Idents(merged) <- "sub.cluster"
merged <- FindSubCluster(merged, cluster = 5, resolution = 0.3, graph.name = "humap_fgraph")
fig.size(10,10)
DimPlot(merged, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20)) +
scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 3737 Number of edges: 37545 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7440 Number of communities: 3 Elapsed time: 0 seconds Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 910 Number of edges: 9917 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8293 Number of communities: 4 Elapsed time: 0 seconds
fig.size(15,15)
VlnPlot(merged, features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD8A", "MKI67", "TCF7"), group.by = "sub.cluster", ncol = 1, pt.size = 0.00000001)
table(merged$sub.cluster)
# table(merged$sub.cluster, merged$orig.ident)
0 1 2 3_0 3_1 3_2 4 5_0 5_1 5_2 5_3 6 7 8 5186 3978 3959 1835 1319 583 1622 382 210 198 120 486 270 217
fig.size(5,20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "sub.cluster", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "sub.cluster", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(5,5)
#mito %
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "sub.cluster") + theme(legend.position = "none") + ggtitle(NULL) +
xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
5_2, 5_0 - non FOXP3 proliferating 5_3 - CD8+ proliferating 7 - contamination clusters from VDJ sequencing - an artifact coming from only the datasets that were generated by VDJ sequencing, and expressing high levels of TRBV7 genes. Therefore is removed. 6 - high mt
Idents(merged) <- "sub.cluster"
merged <- subset(merged, idents = c("5_0", "5_2", "5_3", 6, 7), invert = T)
merged
An object of class Seurat 38406 features across 18909 samples within 3 assays Active assay: RNA (38224 features, 2000 variable features) 3 layers present: counts, data, scale.data 2 other assays present: ADT, HTO 3 dimensional reductions calculated: pca, harmony, humap
merged <- NormalizeData(merged, normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData() %>%
# RunBalancedPCA(weight.by = "tissue", npc = 50)
RunPCA(verbose = F)
Centering and scaling data matrix
# merged <- NormalizeData(merged, normalization.method = "CLR", margin = 2, assay = "ADT") %>%
# ScaleData(assay = "ADT")
fig.size(5,5)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T)
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T)
fig.size(5,15)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F)
fig.size(5,5)
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"), #theta = c(2,2,1),
reduction.use = "pca",
plot_convergence = TRUE,
early_stop = T)
ElbowPlot(merged, ndims = 50, reduction = "harmony")
ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony converged after 3 iterations
# Run UMAP
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 18909 Number of edges: 233331 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7140 Number of communities: 10 Elapsed time: 1 seconds
fig.size(6,7)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6) +
scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
DimPlot(merged, group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20)) + scale_fill_tableau("Tableau 20")
DimPlot(merged, group.by = "orig.ident", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20))
fig.size(5,15)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6, split.by = "tissue") +
scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
fig.size(6,7)
FeaturePlot(merged, features = "FOXP3", order = F)
FeaturePlot(merged, features = "IL2RA", order = F)
FeaturePlot(merged, features = "AREG", order = F)
FeaturePlot(merged, features = "CXCR6", order = F)
FeaturePlot(merged, features = "TCF7", order = F)
FeaturePlot(merged, features = "CCR7", order = F)
fig.size(5, 20)
FeaturePlot(merged, features = "AREG", order = T, split.by = "tissue")
FeaturePlot(merged, features = "AREG", order = F, split.by = "tissue")
FeaturePlot(merged, features = "CXCR6", order = F, split.by = "tissue")
FeaturePlot(merged, features = "CCR7", order = F, split.by = "tissue")
FeaturePlot(merged, features = "TCF7", order = F, split.by = "tissue")
fig.size(5, 20)
DimPlot(merged, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, split.by = "tissue", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(12, 18)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "seurat_clusters"))
umap.bg <- umap.plot %>% select(-seurat_clusters)
ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", alpha = 0.2, size = 0.01) +
geom_point(aes(color = seurat_clusters), size = 0.01) +
facet_wrap(~seurat_clusters) +
scale_color_tableau("Tableau 20") + theme_bw(base_size = 18)
# DimPlot(merged, split.by = "seurat_clusters", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6, ncol =5) +
# theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(10, 12)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "tissue"))
umap.bg <- umap.plot %>% select(-tissue)
ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", size = 0.1) +
geom_point(color = "black", size = 0.1) +
facet_wrap(~tissue) +
scale_color_tableau("Tableau 20") + theme_bw(base_size = 24)
# DimPlot(merged, split.by = "seurat_clusters", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6, ncol =5) +
# theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(3,8)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.0) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "TCF7", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CCR7", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CD8A", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CD8B", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
fig.size(3, 12)
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", split.by = "tissue", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
The default behaviour of split.by has changed.
Separate violin plots are now plotted side-by-side.
To restore the old behaviour of a single split violin,
set split.plot = TRUE.
This message will be shown once per session.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
Idents(merged) <- "seurat_clusters"
merged <- FindSubCluster(merged, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")
# merged$seurat_clusters[merged$seurat_clusters == 7] <- 1 # cluster 7 is 4 cells in total.
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 1, resolution = 0.3, graph.name = "humap_fgraph")
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 5, resolution = 0.3, graph.name = "humap_fgraph")
fig.size(10,10)
DimPlot(merged, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20)) +
scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 2972 Number of edges: 30148 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7555 Number of communities: 3 Elapsed time: 0 seconds
fig.size(12, 18)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "sub.cluster"))
umap.bg <- umap.plot %>% select(-sub.cluster)
ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", alpha = 0.2, size = 0.01) +
geom_point(aes(color = sub.cluster), size = 0.01) +
facet_wrap(~sub.cluster) +
scale_color_tableau("Tableau 20") + theme_bw(base_size = 18)
fig.size(3,8)
VlnPlot(merged, features = "FOXP3", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "sub.cluster", ncol = 1, pt.size = 0.0) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "TCF7", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CCR7", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "MKI67", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) +
scale_fill_tableau("Tableau 20")
table(merged$sub.cluster, merged$orig.ident)
table(merged$sub.cluster, merged$tissue)
AMP2 AMPrep HD_Luo SF.BL
0 287 487 2911 1144
1 338 470 1500 1171
2 144 371 1746 813
3_0 1125 408 91 40
3_1 815 280 16 62
3_2 116 10 2 7
4 661 323 93 668
5 827 321 37 120
6 60 104 492 221
7 60 18 49 120
8 66 85 35 50
9 13 22 54 56
PBMC.Ctrl PBMC.RA SFMC Syn
0 2911 1464 50 404
1 1500 958 416 605
2 1746 903 118 307
3_0 91 61 30 1482
3_1 16 19 50 1088
3_2 2 3 4 126
4 93 82 615 955
5 37 36 117 1115
6 492 172 112 101
7 49 41 90 67
8 35 15 46 140
9 54 58 15 18
fig.size(5, 20)
FeaturePlot(merged, "FOXP3", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "IL2RA", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "AREG", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "CXCR6", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "MKI67", split.by = "tissue", pt.size = 0.01)
Tregs.markers <- wilcoxauc(merged, "seurat_clusters")
write.csv(Tregs.markers, paste0(save.path, "Integrated.Treg.Cluster.Markers.Wilcox.allRes.csv"))
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
Tregs.markers %>%
group_by(group) %>%
filter(padj < 0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 40, order_by = logFC)%>%
dplyr::select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) -> Treg.show
write.csv(Treg.show, paste0(save.path, "Integrated.Treg.Cluster.Markers.Wilcox.filt.csv"))
Treg.show[1:40,]
| row | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | GAS5 | CD74 | IFITM1 | FOS | TNFRSF18 | FOS | AQP3 | ISG15 | STMN1 | GZMK |
| 2 | MT-ATP8 | IL32 | MT-ATP8 | JUND | CD74 | JUND | CRIP1 | MX1 | GAPDH | IFNG-AS1 |
| 3 | EEF1G | HLA-DRB5 | RPL17 | MT-ATP6 | MT2A | JUN | TLE5 | IFI6 | COTL1 | GZMA |
| 4 | APOO | HLA-DRB1 | EEF1G | JUNB | SRGN | LMNA | CNN2 | LY6E | TUBA1B | CNN2 |
| 5 | SNHG29 | CRIP1 | TXNIP | FOSB | TNFRSF4 | FOSB | KLRB1 | MT2A | TYMS | GIMAP7 |
| 6 | RPL17 | S100A4 | RBFOX2 | IGKC | LGALS1 | JUNB | COTL1 | XAF1 | CD74 | IL10RA |
| 7 | EEF1B2 | HLA-DPB1 | MT-ND4L | RPS10 | CD2 | CREM | CD52 | ISG20 | ENO1 | CXCR3 |
| 8 | TCF7 | HLA-DPA1 | APOO | NR4A2 | MTRNR2L12 | MT-ATP6 | S100A4 | STAT1 | DUT | LYAR |
| 9 | MT-ND4L | ANXA2 | RPS4Y1 | JUN | CTSC | DUSP1 | PFN1 | OAS1 | LGALS1 | FCMR |
| 10 | RBFOX2 | SH3BGRL3 | TLE5 | ZNF331 | S100A4 | RGS1 | S100A11 | EPSTI1 | SLC25A5 | PLAAT4 |
| 11 | FP236383.3 | TLE5 | FP236383.3 | MT-ND4 | HLA-DRB1 | TNFAIP3 | LIME1 | EIF2AK2 | PGAM1 | FOXP1 |
| 12 | PRKCQ-AS1 | LIME1 | SNHG29 | DUSP1 | GAPDH | SRGN | PTPRCAP | TRIM22 | TPI1 | GIMAP4 |
| 13 | RPS12 | LGALS1 | RPL36A | MT-ND1 | MT-ATP6 | NR4A2 | ANXA1 | RNF213 | PFN1 | LIME1 |
| 14 | SNHG6 | CYTOR | SNHG6 | YPEL5 | ENO1 | PPP1R15A | SH3BGRL3 | HERC5 | MYL6 | IKZF3 |
| 15 | CCR7 | LSP1 | PTPRCAP | MT-ND2 | CTLA4 | KLF6 | ARPC1B | MTRNR2L12 | CRIP1 | CST7 |
| 16 | LEF1 | S100A10 | RPL8 | DUSP2 | IL32 | ZNF331 | LIMS1 | MX2 | PKM | FYB1 |
| 17 | RPS13 | PFN1 | LDHB | CD69 | MYL6 | DUSP2 | ANXA2 | CD74 | TUBB | VAMP2 |
| 18 | TXNIP | CD52 | SELL | DNAJA1 | LGALS3 | DNAJA1 | NOSIP | SP100 | ACTB | GIMAP5 |
| 19 | RPL36A | GBP5 | LIME1 | BTG2 | BATF | NEAT1 | LAPTM5 | SAMD9L | ARPC1B | RPS15A |
| 20 | IFITM1 | ITGB1 | RPS8 | SOCS3 | TYMP | HSP90AA1 | CD99 | UBE2L6 | LDHA | NA |
| 21 | RPL32 | SELPLG | EEF1A1 | CXCR4 | LINC01943 | YPEL5 | GPR25 | OASL | MCM5 | NA |
| 22 | RPS5 | EMP3 | EEF2 | SOCS1 | TIGIT | BTG2 | SEPTIN9 | IFITM1 | PCLAF | NA |
| 23 | GIMAP7 | S100A6 | RPS21 | SRGN | CLIC1 | MT-ND4 | CORO1A | IFIT3 | PCNA | NA |
| 24 | RPS8 | HLA-A | GSTK1 | PPP1R15A | NEAT1 | RPS10 | UCP2 | SAT1 | MT2A | NA |
| 25 | GIMAP5 | ARHGDIB | RPL32 | MT-ND3 | TNFRSF1B | MYADM | PPP1CA | BST2 | VIM | NA |
| 26 | RPL30 | CLIC1 | RPL34 | CREM | SPOCK2 | DUSP4 | IL2RG | SP110 | DEK | NA |
| 27 | RPL5 | ARPC1B | RPS12 | HSP90AA1 | TIMP1 | SAT1 | EMP3 | ADAR | MCM7 | NA |
| 28 | PABPC1 | HLA-C | RPLP0 | AREG | CST7 | CD69 | CAPZB | IFIT1 | ACTG1 | NA |
| 29 | RPS21 | ALOX5AP | RPL30 | MT-CO3 | LAG3 | RGCC | CD6 | FOXP3 | GZMA | NA |
| 30 | FOXP1 | COTL1 | RPS5 | RGS1 | HLA-A | IGKC | CD2 | IRF7 | RANBP1 | NA |
| 31 | RPL39 | LINC02694 | RPL39 | MALAT1 | LMNA | CXCR4 | LSP1 | TYMP | H2AFZ | NA |
| 32 | RPL35A | TTC39C | RPS3 | SLC2A3 | PKM | ZFP36 | S100A6 | IFI16 | HLA-DRA | NA |
| 33 | RPS3 | FOXP3 | RPL4 | AES | HLA-DPA1 | SELENOK | MYH9 | LGALS9 | HMGN2 | NA |
| 34 | RPL34 | CD99 | RPL5 | RPS20 | HLA-DRB5 | UBC | ACTG1 | SYNE2 | SNRPB | NA |
| 35 | RPS14 | NIBAN1 | PFN1 | RGCC | S100A11 | AES | ARHGDIB | MT1E | TXN | NA |
| 36 | RPS6 | LGALS3 | RPS15A | DDX3X | DUSP4 | TENT5C | ACTB | UBALD2 | HMGB1 | NA |
| 37 | RPL37 | DOK2 | RPS13 | XIST | JUND | PHLDA1 | ITGB1 | IFI35 | ANXA5 | NA |
| 38 | RPS23 | ACTB | RPS27 | TRAC | RNF213 | NFKBIA | CCR6 | SMCHD1 | ANXA2 | NA |
| 39 | RPS4Y1 | HPGD | RPS25 | STAT3 | FOXP3 | TUBB4B | FLNA | GBP5 | CCL5 | NA |
| 40 | RPS25 | RAC2 | RPL37 | NEAT1 | HLA-DRA | IRF1 | OSTF1 | HLA-A | S100A11 | NA |
fig.size(6,7)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6) +
scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
fig.size(4,12)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6, split.by = "tissue") +
scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
dot.dat <- FetchData(merged, vars = c("IL2RA", "seurat_clusters", "donorID"), slot = "data") %>% group_by(donorID, seurat_clusters) %>%
mutate(Mean=mean(IL2RA)) %>%
ungroup() %>%
group_by(seurat_clusters) %>%
mutate(n = n()) %>%
mutate(Mean=mean(IL2RA)) %>%
mutate(SD = sd(IL2RA)/sqrt(n)) %>%
mutate(zscore = (IL2RA - Mean)/SD) #
dot.dat %>% head
ggplot(dot.dat, aes(x = seurat_clusters, y = zscore)) + geom_boxplot() #+ geom_linerange(aes(ymin = mean_z-sd_z, ymax = mean_z+sd_z))
# dot.dat <- dot.dat %>%
# group_by(seurat_clusters) %>%
# mutate(mean_z=mean(zscore)) %>%
# mutate(sd_z = sd(zscore)/sqrt(n)) %>%
# select(seurat_clusters, mean_z, sd_z, n) %>% arrange(mean_z)
# head(dot.dat)
# ggplot(dot.dat, aes(x = seurat_clusters, y = mean_z)) + geom_point() + geom_linerange(aes(ymin = mean_z-sd_z, ymax = mean_z+sd_z))
| IL2RA | seurat_clusters | donorID | Mean | n | SD | zscore |
|---|---|---|---|---|---|---|
| <dbl> | <fct> | <chr> | <dbl> | <int> | <dbl> | <dbl> |
| 1.1012861 | 5 | 300_0302 | 0.6199521 | 1305 | 0.022935069 | 20.9868108 |
| 1.0437781 | 1 | 300_0302 | 0.7157717 | 3479 | 0.014337075 | 22.8782002 |
| 0.9090327 | 7 | 300_0302 | 0.9241466 | 247 | 0.064771527 | -0.2333423 |
| 0.0000000 | 3 | 300_0302 | 0.2094712 | 2972 | 0.009341831 | -22.4229311 |
| 0.9724182 | 5 | 300_0302 | 0.6199521 | 1305 | 0.022935069 | 15.3679955 |
| 1.7990184 | 0 | 300_0302 | 0.3668333 | 4829 | 0.009074467 | 157.8258163 |
collapsed.counts <- presto::collapse_counts(merged@assays$RNA@counts,
merged@meta.data,
c("seurat_clusters", "donorID"))
dat.plot <- collapsed.counts$meta_data %>% tibble::rownames_to_column() %>%
left_join(as.data.frame(collapsed.counts$exprs_norm["IL2RA",]) %>% tibble::rownames_to_column(), by = c("rowname" ="rowname")) %>%
tibble::column_to_rownames("rowname") %>%
rename("IL2RA" = 'collapsed.counts$exprs_norm["IL2RA", ]')
dat.plot%>% head
ggplot(dat.plot, aes(x = seurat_clusters, y = IL2RA)) + geom_boxplot()
CAREFUL: get_norm makes very strong assumptions about data
| seurat_clusters | donorID | N | logUMI | IL2RA | |
|---|---|---|---|---|---|
| <fct> | <chr> | <int> | <dbl> | <dbl> | |
| sample_205 | 5 | 300_0302 | 23 | 11.779741 | 1.4569859 |
| sample_201 | 1 | 300_0302 | 6 | 10.300584 | 0.6973401 |
| sample_207 | 7 | 300_0302 | 1 | 8.817001 | 0.9090327 |
| sample_203 | 3 | 300_0302 | 8 | 10.679849 | 0.8671973 |
| sample_200 | 0 | 300_0302 | 2 | 9.374328 | 1.6570391 |
| sample_206 | 6 | 300_0302 | 2 | 9.173054 | 0.0000000 |
dat.plot <- collapsed.counts$meta_data %>% tibble::rownames_to_column() %>%
left_join(as.data.frame(collapsed.counts$exprs_norm["FOXP3",]) %>% tibble::rownames_to_column(), by = c("rowname" ="rowname")) %>%
tibble::column_to_rownames("rowname") %>%
rename("FOXP3" = 'collapsed.counts$exprs_norm["FOXP3", ]')
dat.plot%>% head
ggplot(dat.plot, aes(x = seurat_clusters, y = FOXP3)) + geom_boxplot()
| seurat_clusters | donorID | N | logUMI | FOXP3 | |
|---|---|---|---|---|---|
| <fct> | <chr> | <int> | <dbl> | <dbl> | |
| sample_205 | 5 | 300_0302 | 23 | 11.779741 | 1.0157278 |
| sample_201 | 1 | 300_0302 | 6 | 10.300584 | 0.9860676 |
| sample_207 | 7 | 300_0302 | 1 | 8.817001 | 1.9355253 |
| sample_203 | 3 | 300_0302 | 8 | 10.679849 | 0.7655572 |
| sample_200 | 0 | 300_0302 | 2 | 9.374328 | 1.2658927 |
| sample_206 | 6 | 300_0302 | 2 | 9.173054 | 0.0000000 |
fig.size(3, 12)
FeaturePlot(merged, features = c("TCF7", "LEF1", "BACH2", "SATB1"), ncol = 4)
FeaturePlot(merged, features = c("PRDM1", "MAF", "IKZF3", "IKZF2"), ncol = 4)
FeaturePlot(merged, features = c("CCR7", "IL6R", "SELL", "S1PR1"), ncol = 4)
c0.c1.c2 <- wilcoxauc(merged, groups_use = c(0,1,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
c0.c1.c2 %>%
group_by(group) %>%
filter(padj < 0.05 & auc > 0.5) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 40, order_by = logFC)%>%
dplyr::select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) -> c0.c1.c2.show
c0.c1.c2.show[1:30,]
| row | 0 | 1 | 2 |
|---|---|---|---|
| <int> | <chr> | <chr> | <chr> |
| 1 | TCF7 | CD74 | S100A4 |
| 2 | CCR7 | S100A4 | CRIP1 |
| 3 | EEF1B2 | HLA-DRB1 | IFITM1 |
| 4 | GAS5 | IL32 | S100A11 |
| 5 | PRKCQ-AS1 | HLA-DRB5 | CRIP2 |
| 6 | RPS12 | HLA-DPA1 | SH3BGRL3 |
| 7 | SNHG29 | CRIP1 | ZFP36L2 |
| 8 | GIMAP7 | HLA-DPB1 | ANXA1 |
| 9 | RPS5 | LGALS1 | PTGER2 |
| 10 | RPS13 | ANXA2 | KLF6 |
| 11 | RPL32 | SH3BGRL3 | TIGIT |
| 12 | APOO | CYTOR | IFITM2 |
| 13 | GIMAP5 | S100A10 | GSTK1 |
| 14 | RPS14 | CLIC1 | IL32 |
| 15 | PABPC1 | SRGN | ITGB1 |
| 16 | RPL5 | S100A6 | S100A10 |
| 17 | RPL38 | COTL1 | AL136456.1 |
| 18 | RPS6 | HLA-DRA | DUSP1 |
| 19 | TXK | LGALS3 | ITGA4 |
| 20 | RPL30 | HLA-A | TAGLN2 |
| 21 | RPL35A | ITGB1 | JUNB |
| 22 | RPS21 | GBP5 | RPS26 |
| 23 | RPL39 | MYL6 | PFN1 |
| 24 | RPS8 | ALOX5AP | FCRL3 |
| 25 | FOXP1 | ARPC1B | GAPDH |
| 26 | MT-ATP8 | EZR | EMP3 |
| 27 | RPS20 | EMP3 | GPR183 |
| 28 | LEF1 | AHNAK | VIM |
| 29 | RPLP2 | LSP1 | GATA3 |
| 30 | RPL22 | VIM | RPL36A |
c0.c2 <- wilcoxauc(merged, groups_use = c(0,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
c0.c2 %>%
group_by(group) %>%
filter(padj < 0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 40, order_by = logFC)%>%
dplyr::select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) -> c0.c2.show
c0.c2.show[1:30,]
| row | 0 | 2 |
|---|---|---|
| <int> | <chr> | <chr> |
| 1 | CCR7 | S100A4 |
| 2 | TCF7 | CRIP1 |
| 3 | PRKCQ-AS1 | IL32 |
| 4 | GAS5 | SH3BGRL3 |
| 5 | EEF1B2 | S100A11 |
| 6 | FOXP1 | S100A10 |
| 7 | TGFBR2 | ITGB1 |
| 8 | TXK | CD74 |
| 9 | RPS12 | ANXA2 |
| 10 | RPS13 | CLIC1 |
| 11 | SNHG29 | EMP3 |
| 12 | PABPC1 | SRGN |
| 13 | RPS5 | KLF6 |
| 14 | RPL31 | VIM |
| 15 | NPM1 | AHNAK |
| 16 | RPL32 | ARPC1B |
| 17 | RPS14 | PFN1 |
| 18 | MALAT1 | HLA-A |
| 19 | RPL35A | GAPDH |
| 20 | RPL38 | S100A6 |
| 21 | RPL5 | COTL1 |
| 22 | RPL7 | TAGLN2 |
| 23 | RPL30 | EZR |
| 24 | RPS23 | MYL6 |
| 25 | RPS6 | GSTK1 |
| 26 | RPL22 | CD99 |
| 27 | RPLP2 | YWHAB |
| 28 | RPS3A | LSP1 |
| 29 | RPS9 | CFL1 |
| 30 | RPL37 | ACTB |
c2.c1 <- wilcoxauc(merged, groups_use = c(1,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
c2.c1 %>%
group_by(group) %>%
filter(padj < 0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 40, order_by = logFC)%>%
dplyr::select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) -> c2.c1.show
c2.c1.show[1:20,]
| row | 1 | 2 |
|---|---|---|
| <int> | <chr> | <chr> |
| 1 | CD74 | EEF1B2 |
| 2 | HLA-DRB1 | IFITM1 |
| 3 | HLA-DRB5 | RPS12 |
| 4 | HLA-DPA1 | RPL36A |
| 5 | HLA-DPB1 | RPL32 |
| 6 | LGALS1 | RPS5 |
| 7 | HLA-DRA | SNHG29 |
| 8 | IL32 | RPS21 |
| 9 | CYTOR | RPS8 |
| 10 | ANXA2 | GIMAP7 |
| 11 | S100A4 | TCF7 |
| 12 | LGALS3 | RPL39 |
| 13 | GBP5 | RPS6 |
| 14 | CTSC | RPL10A |
| 15 | CLIC1 | GAS5 |
| 16 | ALOX5AP | RPS29 |
| 17 | MT2A | RPL5 |
| 18 | SRGN | RPL3 |
| 19 | S100A6 | RPS13 |
| 20 | FOXP3 | RPLP0 |
c4.c5 <- wilcoxauc(merged, groups_use = c(4,5), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
c4.c5 %>%
group_by(group) %>%
filter(padj < 0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 40, order_by = logFC)%>%
dplyr::select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) -> c4.c5.show
c4.c5.show[1:20,]
| row | 4 | 5 |
|---|---|---|
| <int> | <chr> | <chr> |
| 1 | CD74 | FOS |
| 2 | TNFRSF18 | JUNB |
| 3 | GAPDH | JUN |
| 4 | MT2A | FOSB |
| 5 | PKM | DUSP1 |
| 6 | TNFRSF4 | NR4A2 |
| 7 | LAG3 | BTG2 |
| 8 | IL2RG | CXCR4 |
| 9 | UCP2 | ZNF331 |
| 10 | PGAM1 | CD69 |
| 11 | HLA-DRB5 | KLF2 |
| 12 | ENO1 | YPEL5 |
| 13 | CTSC | DNAJB1 |
| 14 | SPOCK2 | RGCC |
| 15 | MTRNR2L12 | TNFAIP3 |
| 16 | PFN1 | ZFP36L2 |
| 17 | MYL6 | KLF6 |
| 18 | LINC01943 | DNAJA1 |
| 19 | TNFRSF1B | JUND |
| 20 | ARPC1B | RPS10 |
table(merged$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 4829 3479 3074 2972 1745 1305 877 247 236 145
#naming clusters
cell.states <- merged$seurat_clusters
cell.states <- cell.states %>% if_else(.=="0", "Naive Treg - TCF7/CCR7", .) %>%
if_else(.=="2", "CD25-intermediate Treg", .) %>%
if_else(.=="1", "CD25-high Treg", .) %>%
if_else(.=="3", "AREG Treg", .) %>%
if_else(.=="4", "CD25-high CXCR6+ Treg", .) %>%
if_else(.=="5", "TNFa response Treg", .) %>%
if_else(.=="6", "CD161+ memory Treg", .) %>%
if_else(.=="7", "ISG high Treg", .) %>%
if_else(.=="8", "Proliferating", .) %>%
if_else(.=="9", "GZM+ Treg", .)
names(cell.states) <- rownames(merged@meta.data)
merged <- AddMetaData(merged, cell.states, "cell.states")
fig.size(10,12)
Idents(merged) <- "cell.states"
DimPlot(merged, group.by = "cell.states", label = T, label.box = T,
shuffle = T, label.size = 5, pt.size = 0.5) +
theme(text = element_text(size = 20)) + scale_fill_tableau("Tableau 20") +
scale_color_tableau("Tableau 20")
Scale for fill is already present. Adding another scale for fill, which will replace the existing scale.
table(merged$cell.states, merged$tissue)
PBMC.Ctrl PBMC.RA SFMC Syn
AREG Treg 109 83 84 2696
CD161+ memory Treg 492 172 112 101
CD25-high CXCR6+ Treg 93 82 615 955
CD25-high Treg 1500 958 416 605
CD25-intermediate Treg 1746 903 118 307
GZM+ Treg 54 58 15 18
ISG high Treg 49 41 90 67
Naive Treg - TCF7/CCR7 2911 1464 50 404
Proliferating 35 15 46 140
TNFa response Treg 37 36 117 1115
fig.size(8,8)
sum.clust <- merged@meta.data %>% tibble::rownames_to_column(var = "cell.ID") %>%
dplyr::select(cell.ID, cell.states, tissue, orig.ident)
pt <- table(sum.clust$cell.states, sum.clust$tissue)
pt
pt %>% data.frame() %>% setNames(c("cluster", "tissue", "Freq")) %>%
ggplot(aes(x = tissue, y = Freq, fill = cluster)) + scale_fill_tableau("Tableau 20") + theme_bw(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")
pt %>% data.frame() %>% setNames(c("cluster", "tissue", "Freq")) %>%
ggplot(aes(x = cluster, y = Freq, fill = tissue)) + scale_fill_tableau("Tableau 20") + theme_bw(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")
PBMC.Ctrl PBMC.RA SFMC Syn
AREG Treg 109 83 84 2696
CD161+ memory Treg 492 172 112 101
CD25-high CXCR6+ Treg 93 82 615 955
CD25-high Treg 1500 958 416 605
CD25-intermediate Treg 1746 903 118 307
GZM+ Treg 54 58 15 18
ISG high Treg 49 41 90 67
Naive Treg - TCF7/CCR7 2911 1464 50 404
Proliferating 35 15 46 140
TNFa response Treg 37 36 117 1115
saveRDS(merged, "/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated.Tregs.rds")
# merged <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated.Tregs.rds")